In [8]:
import plotly.offline as py
from plotly.graph_objs import *
import plotly.graph_objs as go
import numpy as np
import pandas as pd
import warnings
from plotly.tools import FigureFactory as ff
import colorlover as cl
import skbio
from skbio.alignment import global_pairwise_align_nucleotide
from skbio.sequence import DNA
import pandas as pd
import itertools
import numpy as np
from skbio.sequence.distance import hamming
from skbio.stats.distance import DistanceMatrix
from scipy.spatial.distance import pdist, squareform
import plotly.express as px

warnings.filterwarnings('ignore')
In [2]:
trace1 = {
  "uid": "ec0c5770-e86c-4231-9350-c8a5424aeeef", 
  "mode": "markers", 
  "type": "scatter", 
  "x": [-2.330078922787911, -2.0578559805954546, -1.8851770324320443, -1.75530050130824, -1.649672679353478, -1.5597799921032536, -1.480972651368176, -1.4104195313382353, -1.3462626652319192, -1.2872137328173294, -1.2323408611117501, -1.1809470407966427, -1.132496529618965, -1.086568114986069, -1.0428242390384295, -1.0009899168818812, -0.9608379310031608, -0.9221781782775869, -0.8848498412982431, -0.8487155274221447, -0.8136568081151939, -0.7795707737384843, -0.746367337187046, -0.713967098197978, -0.6822996332113872, -0.6513021122615636, -0.6209181700422963, -0.5910969765728049, -0.5617924660992517, -0.5329626925342973, -0.5045692868981857, -0.4765769975882356, -0.44895329836199876, -0.4216680520195305, -0.3946932201593915, -0.3680026112393317, -0.341571660625871, -0.3153772374626603, -0.28939747409646166, -0.2636116145249011, -0.23799987891187127, -0.212543341685061, -0.18722382110885646, -0.162023778532748, -0.13692622576424984, -0.11191463921698815, -0.08697287964740107, -0.06208511642395301, -0.03723575537593384, -0.012409369348679852, 0.012409369348679852, 0.03723575537593384, 0.06208511642395301, 0.0869728796474012, 0.11191463921698828, 0.13692622576424998, 0.16202377853274785, 0.1872238211088563, 0.21254334168506087, 0.23799987891187127, 0.2636116145249011, 0.28939747409646166, 0.3153772374626603, 0.341571660625871, 0.3680026112393317, 0.3946932201593915, 0.4216680520195306, 0.448953298361999, 0.4765769975882358, 0.5045692868981856, 0.5329626925342971, 0.5617924660992515, 0.5910969765728049, 0.6209181700422963, 0.6513021122615636, 0.6822996332113872, 0.713967098197978, 0.746367337187046, 0.7795707737384843, 0.8136568081151939, 0.848715527422145, 0.8848498412982432, 0.9221781782775866, 0.9608379310031608, 1.0009899168818812, 1.0428242390384295, 1.086568114986069, 1.132496529618965, 1.1809470407966427, 1.2323408611117501, 1.2872137328173294, 1.3462626652319198, 1.4104195313382353, 1.480972651368176, 1.5597799921032531, 1.6496726793534775, 1.75530050130824, 1.8851770324320436, 2.0578559805954537, 2.3300789227879104], 
  "y": [39.34143945759198, 40.02803115129471, 40.11135859671047, 40.22743939999248, 40.71907259694426, 41.28313852050546, 42.27299853944366, 42.458392564183924, 42.90721985388303, 44.31698894084478, 44.60097557107362, 44.77433731242781, 45.17467164741618, 45.30283320118724, 45.97704428865023, 46.00995591163878, 46.230535196261506, 46.34015248999396, 46.399572196405515, 46.51094984764143, 47.2534549294726, 47.30060464333649, 47.35351959500256, 47.49135550202566, 47.553313915576744, 47.5895343058226, 47.61928992536331, 48.18750408422179, 48.1890977642888, 48.24564054280064, 48.30429876968282, 48.31183831457235, 48.6437600601711, 48.663414056632156, 48.83908872027955, 48.886131504485, 48.92105053834739, 48.93651180644744, 49.1269989470353, 49.2628987113649, 49.39047154419056, 49.4386376440292, 49.59438908091527, 49.670760761443994, 49.69043985253798, 49.75584744330624, 49.87047331591907, 49.95808075035739, 50.021457154670166, 50.44793806294758, 50.49574607917622, 50.54274262857485, 50.66354147879976, 50.67568439224318, 50.97506639616547, 51.01790398663739, 51.0863257255902, 51.1431506506233, 51.22271988323995, 51.3275579284606, 51.33035082000276, 51.44547101901968, 51.56084968156611, 51.57376889405867, 51.59678210545612, 51.966706085408696, 52.00104994126174, 52.16513094976799, 52.22568806415174, 52.304514508358594, 52.82576334828314, 53.06602092429819, 53.10300331504307, 53.1066798694524, 53.301157756131566, 53.54080009963302, 53.559795082599194, 53.57639487199203, 54.134993113311324, 54.21112369191328, 54.587294687552145, 54.59134575739708, 54.9453622874972, 55.14137038991352, 55.23091428404132, 55.538541173913245, 55.618456267047115, 55.64392576554938, 56.01518686906106, 56.28236131877578, 56.54236540431739, 56.65793252064759, 56.98998188546788, 57.42268500918291, 58.36311106653914, 58.72407074169651, 59.92542295712866, 61.92483665355549, 61.97351832442268, 62.338255282173016], 
  "marker": {"color": "#19d3f3"}
}
trace2 = {
  "uid": "d1dcb3a4-a6c7-44f3-b911-c59c66739751", 
  "line": {"color": "#636efa"}, 
  "mode": "lines", 
  "type": "scatter", 
  "x": [-2.330078922787911, -2.0578559805954546, -1.8851770324320443, -1.75530050130824, -1.649672679353478, -1.5597799921032536, -1.480972651368176, -1.4104195313382353, -1.3462626652319192, -1.2872137328173294, -1.2323408611117501, -1.1809470407966427, -1.132496529618965, -1.086568114986069, -1.0428242390384295, -1.0009899168818812, -0.9608379310031608, -0.9221781782775869, -0.8848498412982431, -0.8487155274221447, -0.8136568081151939, -0.7795707737384843, -0.746367337187046, -0.713967098197978, -0.6822996332113872, -0.6513021122615636, -0.6209181700422963, -0.5910969765728049, -0.5617924660992517, -0.5329626925342973, -0.5045692868981857, -0.4765769975882356, -0.44895329836199876, -0.4216680520195305, -0.3946932201593915, -0.3680026112393317, -0.341571660625871, -0.3153772374626603, -0.28939747409646166, -0.2636116145249011, -0.23799987891187127, -0.212543341685061, -0.18722382110885646, -0.162023778532748, -0.13692622576424984, -0.11191463921698815, -0.08697287964740107, -0.06208511642395301, -0.03723575537593384, -0.012409369348679852, 0.012409369348679852, 0.03723575537593384, 0.06208511642395301, 0.0869728796474012, 0.11191463921698828, 0.13692622576424998, 0.16202377853274785, 0.1872238211088563, 0.21254334168506087, 0.23799987891187127, 0.2636116145249011, 0.28939747409646166, 0.3153772374626603, 0.341571660625871, 0.3680026112393317, 0.3946932201593915, 0.4216680520195306, 0.448953298361999, 0.4765769975882358, 0.5045692868981856, 0.5329626925342971, 0.5617924660992515, 0.5910969765728049, 0.6209181700422963, 0.6513021122615636, 0.6822996332113872, 0.713967098197978, 0.746367337187046, 0.7795707737384843, 0.8136568081151939, 0.848715527422145, 0.8848498412982432, 0.9221781782775866, 0.9608379310031608, 1.0009899168818812, 1.0428242390384295, 1.086568114986069, 1.132496529618965, 1.1809470407966427, 1.2323408611117501, 1.2872137328173294, 1.3462626652319198, 1.4104195313382353, 1.480972651368176, 1.5597799921032531, 1.6496726793534775, 1.75530050130824, 1.8851770324320436, 2.0578559805954537, 2.3300789227879104], 
  "y": [39.130691799051924, 40.44694345743266, 41.28188025613511, 41.90985891987164, 42.4205902884306, 42.855239174804616, 43.23628822269047, 43.57742648420294, 43.88763759546647, 44.173150839098106, 44.43847200490756, 44.68697127052955, 44.92123905550531, 45.1433120058616, 45.35482228047826, 45.55709948559664, 45.75124226274281, 45.93816979804823, 46.11865967541837, 46.293376215401395, 46.46289204333575, 46.62770474763841, 46.78824991724382, 46.944911468095555, 47.0980299120063, 47.247909044261995, 47.394821402243856, 47.53901275894208, 47.68070585136917, 47.82010349713697, 47.95739121783608, 48.09273946192363, 48.22630550020206, 48.35823505198176, 48.48866368846805, 48.617718050937555, 48.745516914241854, 48.87217212063709, 48.99778940454302, 49.12246912532807, 49.24630692240406, 49.36939430464876, 49.491819184342674, 49.61366636432289, 49.7350179858533, 49.85595394373554, 49.97655227439789, 50.09688952206855, 50.21704108764098, 50.33708156445307, 50.45708506491566, 50.57712554172775, 50.69727710730018, 50.81761435497085, 50.93821268563319, 51.059148643515435, 51.18050026504584, 51.30234744502606, 51.42477232471997, 51.54785970696467, 51.671697504040665, 51.79637722482571, 51.92199450873164, 52.04864971512688, 52.17644857843118, 52.30550294090068, 52.435931577386974, 52.56786112916667, 52.701427167445104, 52.83677541153265, 52.97406313223176, 53.11346077799956, 53.25515387042665, 53.399345227124876, 53.54625758510674, 53.696136717362435, 53.84925516127318, 54.005916712124915, 54.16646188173032, 54.33127458603298, 54.50079041396734, 54.67550695395036, 54.8559968313205, 55.042924366625925, 55.237067143772094, 55.43934434889047, 55.65085462350713, 55.87292757386342, 56.107195358839185, 56.35569462446117, 56.621015790270626, 56.90652903390227, 57.216740145165794, 57.55787840667826, 57.938927454564116, 58.37357634093813, 58.884307709497094, 59.51228637323362, 60.347223171936065, 61.6634748303168]
}
data = Data([trace1, trace2])
layout = {
  "title": {"text": "Quantile-Quantile Plot"}, 
  "width": 800, 
  "xaxis": {
    "title": {"text": "Theoritical Quantities"}, 
    "zeroline": False
  }, 
  "yaxis": {"title": {"text": "Sample Quantities"}}, 
  "height": 700, 
  "showlegend": False
}
fig = Figure(data=data, layout=layout)
plot_url = py.iplot(fig)
In [3]:
f = 'seqs_quals.fastq'
seqs = skbio.io.read(f, format='fastq', verify=False, variant='illumina1.8')
seq1 = seqs.__next__()
#seq1
#seq1.positional_metadata.quality[:10]
seqs = skbio.io.read(f, format='fastq', verify=False, variant='illumina1.8')

df = pd.DataFrame()
num_sequences = 500

for count, seq in enumerate(itertools.islice(seqs, num_sequences)):
    df[count] = seq.positional_metadata.quality
    
purd = cl.scales['11']['div']['RdYlBu']
purd40 = cl.interp(purd, 40)

traces = []
for e in range(len(df)):
    traces.append(go.Box(
        y=df.iloc[e].values,
        name=e,
        boxpoints='outliers',
        whiskerwidth=0.2,
        marker=dict(
            size=.1,
            color=purd40[int(round(df.iloc[e].mean(), 0))]
        ),
        line=dict(width=1),
    ))

layout = go.Layout(
    title='Quality Score Distributions',
    yaxis=dict(
        title='Quality Score',
        autorange=True,
        showgrid=True,
        zeroline=True,
        gridcolor='#d9d4d3',
        zerolinecolor='#d9d4d3',
    ),
    xaxis=dict(
        title='Base Position',
    ),

    font=dict(family='Times New Roman', size=16, color='#2e1c18'),
    paper_bgcolor='#eCe9e9',
    plot_bgcolor='#eCe9e9'
)

fig = go.Figure(data=traces, layout=layout)
py.iplot(fig, filename='quality-scores')
In [4]:
seqs = [DNA(e) for e in itertools.islice(skbio.io.read(f, format='fastq', variant='illumina1.8'), 2)]
aligned_seqs = global_pairwise_align_nucleotide(seqs[0], seqs[1])

msa = skbio.alignment.TabularMSA.read('msa10.fna', constructor=DNA)
base_dic = {'A': 1, 'C': .25, 'G': .5, 'T': .75, '-': 0}

def seq_align_for_plot(msa):
    base_text = [list(str(e)) for e in msa]
    base_values = np.zeros((len(base_text), len(base_text[0])))
    for i in range(len(base_text[0])):
        for j in range(len(base_text)):
            if base_text[j][i] != base_text[0][i]:
                base_values[j][i] = base_dic[base_text[j][i]]
    return(base_text, base_values)

base_text, base_values = seq_align_for_plot(msa)

colorscale=[[0.00, '#F4F0E4'], 
            [0.25, '#1b9e77'], 
            [0.50, '#d95f02'], 
            [0.75, '#7570b3'],
            [1.00, '#e7298a']]

seq_names = ["Seq " + str(e + 1) for e in range(len(base_text))]

fig = ff.create_annotated_heatmap(base_values, 
                                  annotation_text=base_text, 
                                  colorscale=colorscale)

fig['layout'].update(
    title="Aligned Sequences",
    xaxis=dict(ticks='', 
               side='top',
               ticktext=list(np.arange(0, len(base_text[0]), 10)),
               tickvals=list(np.arange(0, len(base_text[0]), 10)),
               showticklabels=True,
               tickfont=dict(family='Bookman', 
                             size=18, 
                             color='#22293B',
                            ),
              ),
    
    yaxis=dict(autorange='reversed',
               ticks='', 
               ticksuffix='  ',
               ticktext=seq_names,
               tickvals=list(np.arange(0, len(base_text))),
               showticklabels=True,
               tickfont=dict(family='Bookman', 
                         size=18, 
                         color='#22293B',
                            ),
              ),
    width=10000,
    height=450,
    autosize=True,
    annotations=dict(font=dict(family='Courier New, monospace',
                                size=14,
                                color='#3f566d'
                               ),
                      )
)
py.iplot(fig, filename='msa')
In [5]:
hamming_dm = DistanceMatrix.from_iterable(msa, metric=hamming, key='id')
colorscale = [[0.0,'#4bd4d1'], [0.5, '#1c9099']]  # custom colorscale
['#ece2f0','#a6bddb','#1c9099']
trace = go.Heatmap(x=seq_names, y=seq_names, z=hamming_dm.data, colorscale='Viridis')

fig = go.Figure(data=[trace])

fig['layout'].update(
    xaxis=dict(ticks=''),
    yaxis=dict(ticks='', ticksuffix='  '),
    width=700,
    height=700,
    autosize=False,
    margin=go.Margin(
        l=200,
        b=200,
        pad=4
    )
)
py.iplot(fig, filename='dm_heatmap')
In [6]:
hamming_array = hamming_dm.data + .001
np.fill_diagonal(hamming_array, 0)
names = hamming_dm.ids

group1 = ['Seq 1', 'Seq 7', 'Seq 9', 'Seq 3']
group2 = ['Seq 2', 'Seq 4', 'Seq 5', 'Seq 6', 'Seq 8']

names = ["Seq " + str(e + 1) for e in range(len(hamming_array))]
fig = ff.create_dendrogram(hamming_array.data, 
                           orientation='right',
                           labels=names)


fig['layout'].update(
    width=500,
    height=700,                     
)

fig['layout']['yaxis'].update(dict(side='right', 
                                   showline=False,
                                   ticks='',
                                   zeroline=True,
                                   zerolinewidth=4,
                                   zerolinecolor='#969696')
                              )

fig['layout']['xaxis'].update(dict(showline=False,
                                   showticklabels=True,
                                   ticktext=np.array(['0.0', '0.2', '0.4', '0.6', '0.8']),
                                   tickvals=[-0.8, -0.6, -0.4, -0.2, -0.0],
                                   tickfont=dict(color='#969696'),
                                   mirror=None,
                                   ticklen=8,
                                   tickwidth=4,
                                   tickcolor='#969696',
                                   range=[-0.9, 0.1])
                             )
tips = []
for i in fig['data']:
    if 0 in i['x']:
        i['marker']['color'] = '#ffbe4f'
    else:
        i['marker']['color'] = '#0c457d'

for i in range(len(names)):
    if fig['layout']['yaxis']['ticktext'][i] in group1:
        color = '#e8702a'
        symbol = 'circle'
    else:
        color = '#0ea7b5'
        symbol = 'triangle-left'
        
    x = [-0.0]
    y = [fig['layout']['yaxis']['tickvals'][i]]
    trace = go.Scatter(x=x, y=y, 
                       mode='markers',
                       marker=dict(
                            size=10,
                            color=color,
                            symbol=symbol)
                      )
    fig.add_trace(trace)

py.iplot(fig, filename='phylo-dendrogram')
In [7]:
# Dendrogram with a Heatmap

# get data
data = np.genfromtxt("http://files.figshare.com/2133304/ExpRawData_E_TABM_84_A_AFFY_44.tab",
                     names=True,usecols=tuple(range(1,30)),dtype=float, delimiter="\t")
data_array = data.view((np.float, len(data.dtype.names)))
data_array = data_array.transpose()
labels = data.dtype.names

# Initialize figure by creating upper dendrogram
fig = ff.create_dendrogram(data_array, orientation='bottom', labels=labels)
for i in range(len(fig['data'])):
    fig['data'][i]['yaxis'] = 'y2'

# Create Side Dendrogram
dendro_side = ff.create_dendrogram(data_array, orientation='right')
for i in range(len(dendro_side['data'])):
    dendro_side['data'][i]['xaxis'] = 'x2'

# Add Side Dendrogram Data to Figure
for data in dendro_side['data']:
    fig.add_trace(data)

# Create Heatmap
dendro_leaves = dendro_side['layout']['yaxis']['ticktext']
dendro_leaves = list(map(int, dendro_leaves))
data_dist = pdist(data_array)
heat_data = squareform(data_dist)
heat_data = heat_data[dendro_leaves,:]
heat_data = heat_data[:,dendro_leaves]

heatmap = [
    go.Heatmap(
        x = dendro_leaves,
        y = dendro_leaves,
        z = heat_data,
        colorscale = 'Blues'
    )
]

heatmap[0]['x'] = fig['layout']['xaxis']['tickvals']
heatmap[0]['y'] = dendro_side['layout']['yaxis']['tickvals']

# Add Heatmap Data to Figure
for data in heatmap:
    fig.add_trace(data)

# Edit Layout
fig.update_layout({'width':800, 'height':800,
                         'showlegend':False, 'hovermode': 'closest',
                         })
# Edit xaxis
fig.update_layout(xaxis={'domain': [.15, 1],
                                  'mirror': False,
                                  'showgrid': False,
                                  'showline': False,
                                  'zeroline': False,
                                  'ticks':""})
# Edit xaxis2
fig.update_layout(xaxis2={'domain': [0, .15],
                                   'mirror': False,
                                   'showgrid': False,
                                   'showline': False,
                                   'zeroline': False,
                                   'showticklabels': False,
                                   'ticks':""})

# Edit yaxis
fig.update_layout(yaxis={'domain': [0, .85],
                                  'mirror': False,
                                  'showgrid': False,
                                  'showline': False,
                                  'zeroline': False,
                                  'showticklabels': False,
                                  'ticks': ""
                        })
# Edit yaxis2
fig.update_layout(yaxis2={'domain':[.825, .975],
                                   'mirror': False,
                                   'showgrid': False,
                                   'showline': False,
                                   'zeroline': False,
                                   'showticklabels': False,
                                   'ticks':""})

# Plot!
fig.show()
In [9]:
# 3D scatter plot with plotly express

import plotly.express as px
iris = px.data.iris()
fig = px.scatter_3d(iris, x='sepal_length', y='sepal_width', z='petal_width',
              color='species')
fig.show()